import pandas as pd
# 消除重复数据
students = pd.read_excel('Students_Duplicates.xlsx')
#duplicated(subset='Name')  基于某列消除重复数据
# 返回bool型
dupe = students.duplicated(subset='Name')
print(dupe)
dupe = dupe[dupe == True]
# dupe = dupe[dupe] ，代表重复数据
# iloc定位数据，输出重复数据
print(students.iloc[dupe.index])
print("=========")
# 去除数据
# keep保留数据 first和last
students.drop_duplicates(subset='Name', inplace=True, keep='last')
print(students)
